package core;
import java.io.File;


public class Corpus {

	public Document[] listaDocumentos;
	
	public Corpus(String dirData) throws Exception{
		carregaDataset(dirData);		
	}
	
	/*private void carregaTar(String dirData) throws Exception {
		System.out.println("Modo TAR");
		TarArchiveInputStream input = new TarArchiveInputStream(new GZIPInputStream(new FileInputStream(new File(dirData))));
		TarArchiveEntry entry;
		
		int i=0;
		while ( (entry = input.getNextTarEntry()) != null ) {
			
			File file = entry.getFile();
			file.
			System.out.println("aaa"+file.length());
			Document documento = new Document(file);
			documento.extraiFrases();
			listaDocumentos[i++] = documento;
		}
	}*/
	

	public void carregaDataset(String dataset) throws Exception{
		File folder = new File(dataset);
		File[] listOfFiles = folder.listFiles();
		
		int cont=0;
		for (File it : listOfFiles) {
			if(it.getName().contains(".txt"))
				cont++;
		}
		
		listaDocumentos = new Document[cont];
		
		int i=0;
		for (File it : listOfFiles) {
			File file = new File(it.getAbsolutePath());
			if(!file.getName().contains(".txt"))
				continue;
			Document documento = new Document(file);
			documento.extraiFrases();
			listaDocumentos[i++] = documento;
		}
	}
	
	
}
